/**
* Copyright (C) 2011 rwitzel75@googlemail.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.rwitzel.streamflyer.regex;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.junit.ComparisonFailure;
import org.junit.Test;
import com.github.rwitzel.streamflyer.core.AfterModification;
import com.github.rwitzel.streamflyer.internal.thirdparty.ZzzAssert;
import com.github.rwitzel.streamflyer.regex.MatchProcessor;
import com.github.rwitzel.streamflyer.regex.OnStreamMatcher;
import com.github.rwitzel.streamflyer.regex.RegexModifier;
import com.github.rwitzel.streamflyer.regex.ReplacingProcessor;
import com.github.rwitzel.streamflyer.util.StringUtils;
/**
* Tests {@link RegexModifier} (white-box tests). These tests may fail if the implementation of {@link RegexModifier} is
* changed.
*
* @author rwoo
* @since 23.06.2011
*/
public class RegexModifierUnitTest extends AbstractRegexModifierTest {
public class RegexModifierWithCheckpoints extends RegexModifier {
protected List<Object[]> __passedCheckpoints = new ArrayList<Object[]>();
public RegexModifierWithCheckpoints(OnStreamMatcher matcher, MatchProcessor matchProcessor,
int minimumLengthOfLookBehind, int newNumberOfChars) {
super(matcher, matchProcessor, minimumLengthOfLookBehind, newNumberOfChars);
}
/**
* This method is called if a certain line of code is reached ("checkpoint").
* <p>
* This method should be called only if the modifier is tested. Otherwise you might experience a severe
* performance penalties.
*
* @param checkpointDescription
* A list of objects describing the checkpoint. The objects should be given as name-value-pairs.
* @return Returns true. This allows you to use this method as side-effect in Java assertions.
*/
@Override
protected boolean __checkpoint(Object... checkpointDescription) {
for (int index = 0; index < checkpointDescription.length; index = index + 2) {
if (checkpointDescription[index + 1] instanceof StringBuilder) {
checkpointDescription[index + 1] = ((StringBuilder) checkpointDescription[index + 1]).toString();
}
}
return __passedCheckpoints.add(checkpointDescription);
}
/**
* @return Returns the {@link #__passedCheckpoints}.
*/
public List<Object[]> __passedCheckpoints() {
return __passedCheckpoints;
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return super.toString() + "[sizeOf(__passedCheckpoints)=" + __passedCheckpoints.size() + "]";
}
}
/**
* @see com.github.rwitzel.streamflyer.regex.AbstractRegexModifierTest#createModifier(java.lang.String,
* java.lang.String, int, int)
*/
@Override
protected RegexModifier createModifier(String regex, String replacement, int minimumLengthOfLookBehind,
int requestedCapacityOfCharacterBuffer, int flags) {
// create matcher
OnStreamMatcher matcher = createMatcher(regex, flags);
// create modifier
RegexModifier modifier = new RegexModifierWithCheckpoints( //
matcher, //
new ReplacingProcessor(replacement), //
minimumLengthOfLookBehind, //
requestedCapacityOfCharacterBuffer);
return modifier;
}
/**
* @see com.github.rwitzel.streamflyer.regex.AbstractRegexModifierTest#assertReplacementByReader(java.lang.String,
* java.lang.String, java.lang.String, int, int, java.lang.String, int)
*/
@Override
protected RegexModifierWithCheckpoints assertReplacementByReader(String input, String regex, String replacement,
int minimumLengthOfLookBehind, int requestedCapacityOfCharacterBuffer, String expectedOutput, int flags)
throws Exception {
System.out.println(String.format("Replacing '%s' " + "with '%s' with "
+ "buffer size %s (look-behind %s) shall convert\n '%s' to " + "\n '%s'", regex, replacement,
requestedCapacityOfCharacterBuffer, minimumLengthOfLookBehind, input, expectedOutput));
return (RegexModifierWithCheckpoints) super.assertReplacementByReader(input, regex, replacement,
minimumLengthOfLookBehind, requestedCapacityOfCharacterBuffer, expectedOutput, flags);
}
private void print(List<Object[]> passedCheckpoints) {
// printXml(passedCheckpoints);
// printYaml(passedCheckpoints);
// printNice(passedCheckpoints);
}
@SuppressWarnings("unused")
private void printXml(List<Object[]> passedCheckpoints) {
// XML:
int checkpointIndex = 0;
for (Object[] passedCheckpoint : passedCheckpoints) {
checkpointIndex++;
System.out.println( //
"<checkpoint index=\"" + checkpointIndex + "\">");
for (int index = 0; index < passedCheckpoint.length; index = index + 2) {
String name = "" + passedCheckpoint[index];
String value = "" + passedCheckpoint[index + 1];
System.out.println(" <" + name + ">" + value + "</" + name + ">");
}
System.out.println("</checkpoint>");
}
}
@SuppressWarnings("unused")
private void printYaml(List<Object[]> passedCheckpoints) {
// YAML-like:
int checkpointIndex = 0;
for (Object[] passedCheckpoint : passedCheckpoints) {
checkpointIndex++;
System.out.println( //
"checkpoint: &" + checkpointIndex);
for (int index = 0; index < passedCheckpoint.length; index = index + 2) {
String name = "" + passedCheckpoint[index];
String value = "" + passedCheckpoint[index + 1];
System.out.println(" " + name + ": " + value);
}
}
}
@SuppressWarnings("unused")
private void printNice(List<Object[]> passedCheckpoints) {
System.out.println(String.format("%3s %16s | %3s %5s %s %s", "num", "name/afterMod", "loB", "buLe",
"buffer/skipped chars", "eos?")); //
//
int checkpointIndex = 0;
for (Object[] passedCheckpoint : passedCheckpoints) {
checkpointIndex++;
Map<String, Object> data = new HashMap<String, Object>();
for (int index = 0; index < passedCheckpoint.length; index = index + 2) {
String name = "" + passedCheckpoint[index];
Object value = passedCheckpoint[index + 1];
data.put(name, value);
}
// first line:
// - checkpoint number,
// - checkpoint name,
// - look-behind width,
// - total character size,
// - buffer content,
// - "EOS" if end of stream hit
String name = (String) data.get("name");
Integer minLen = (Integer) data.get("minLen");
String characterBuffer = (String) data.get("characterBuffer");
// System.out.println(characterBuffer + " " + minLen + " " + );
Boolean endOfStreamHit = (Boolean) data.get("endOfStreamHit");
System.out.println(String.format("%3d %16s | %3d %5d '%s' %s", checkpointIndex, name, minLen,
characterBuffer.length(), characterBuffer.toString(), endOfStreamHit ? "EOS" : "")); //
// second line
if (name.equals("match_n_continue")) {
// second line: "continue"
// - empty,
// - empty,
// - empty,
// - empty,
// - "_" for the characters that are in the look-behind area,
// ">" for the characters that are already processed, "?" for
// the characters behind
// - "EOS" if end of stream hit
Integer minFrom = (Integer) data.get("minFrom");
ZzzAssert.notNull(minFrom);
String bufferDescription = StringUtils.repeat("_", minLen) + StringUtils.repeat(">", minFrom - minLen)
+ StringUtils.repeat("?", characterBuffer.length() - minFrom - minLen);
System.out.println(String.format("%3s %16s | %3s %5s '%s'", "", " ", " ", " ", bufferDescription)); //
} else {
// second line: 'AfterModification' is returned
// - empty,
// - type of AfterModification,
// - requested look-behind width,
// - requested total character size,
// - "_" for the characters that are in the look-behind area,
// "X" for the characters that are to skip, "?" for
// the characters behind
// - "EOS" if end of stream hit
AfterModification mod = (AfterModification) data.get("afterModification");
ZzzAssert.notNull(mod);
String bufferDescription = StringUtils.repeat("_", minLen)
+ StringUtils.repeat("X", mod.getNumberOfCharactersToSkip())
+ StringUtils
.repeat("?", characterBuffer.length() - mod.getNumberOfCharactersToSkip() - minLen);
Integer newMinLen = mod.getNewMinimumLengthOfLookBehind();
Integer newCharLen = mod.getNewNumberOfChars();
String modificationType = mod.getMessageType();
if (modificationType.equals("MODIFY AGAIN IMMEDIATELY")) {
modificationType = "MODIFY AGAIN"; // shorten the string
}
System.out.println(String.format("%3s %16s | %3d %5d '%s'", "", modificationType, newMinLen,
newCharLen, bufferDescription)); //
}
}
}
@Test
public void testReplacement_matchEmptyString_ReplaceWithNothingSoThatNothingToSkip_AtEndStream() throws Exception {
String regex = "";
String replacement = "";
String input = "";
String expectedOutput = "";
// System.out.println("Java...");
assertEquals(expectedOutput, input.replaceAll(regex, replacement));
// System.out.println("Streamflyer...");
List<Object[]> passedCheckpoints = assertReplacementByReader(input, regex, replacement, 0, 2, expectedOutput, 0)
.__passedCheckpoints();
print(passedCheckpoints);
}
@Test
public void testBoundaryMatchers1_caret_TheBeginningOfALine_multiline_correctUsage_withLookBehind()
throws Exception {
// test: match "^bar" in "foobar" - with look-behind
RegexModifier modifier = createModifier("^bar", "boom", 1, 100, Pattern.MULTILINE);
StringBuilder charBuf = new StringBuilder("obar");
modifier.modify(charBuf, 1, false);
assertEquals("obar", charBuf.toString()); // assert no match
// test: match "^bar" in "foo\nbar" - with look-behind
modifier = createModifier("^bar", "boom", 1, 100, Pattern.MULTILINE);
charBuf = new StringBuilder("\nbar");
modifier.modify(charBuf, 1, false);
assertEquals("\nboom", charBuf.toString()); // assert match
// test: match "^bar" in "bar" - with look-behind
modifier = createModifier("^bar", "boom", 1, 100, 0);
charBuf = new StringBuilder("bar");
modifier.modify(charBuf, 0, false);
assertEquals("boom", charBuf.toString()); // assert match
}
@Test
public void testBoundaryMatchers1_caret_TheBeginningOfALine_noMultiline_correctUsage_withLookBehind()
throws Exception {
// test: match "^bar" in "foobar" - with look-behind
RegexModifier modifier = createModifier("^bar", "boom", 1, 100, 0);
StringBuilder charBuf = new StringBuilder("obar");
modifier.modify(charBuf, 1, false);
assertEquals("obar", charBuf.toString()); // assert no match
// test: match "^bar" in "foo\nbar" - with look-behind
modifier = createModifier("^bar", "boom", 1, 100, 0);
charBuf = new StringBuilder("\nbar");
modifier.modify(charBuf, 1, false);
assertEquals("\nbar", charBuf.toString()); // assert no match
// test: match "^bar" in "bar" - with look-behind
modifier = createModifier("^bar", "boom", 1, 100, 0);
charBuf = new StringBuilder("bar");
modifier.modify(charBuf, 0, false);
assertEquals("boom", charBuf.toString()); // assert match
}
@Test
public void testBoundaryMatchers2_dollar_TheEndOfALine_multiline() throws Exception {
// test: match "foo$" in "foobar"
RegexModifier modifier = createModifier("foo$", "hoo", 0, 100, Pattern.MULTILINE);
StringBuilder charBuf = new StringBuilder("foo");
AfterModification modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertTrue(modification.isModifyAgainImmediately());
assertEquals(0, modification.getNumberOfCharactersToSkip());
assertEquals("foo", charBuf.toString()); // not changed
charBuf = new StringBuilder("foob");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("foob", charBuf.toString()); // not changed
// test: match "foo$" in "foo\nbar"
charBuf = new StringBuilder("foo\n");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("hoo\n", charBuf.toString()); // changed
}
@Test
public void testBoundaryMatchers3_b_AWordBoundary_AtTheBeginning_correctUsage_withLookBehind() throws Exception {
// test: match "\bbar" in "foobar" - with look-behind
RegexModifier modifier = createModifier("\\bbar", "boom", 1, 100, 0);
StringBuilder charBuf = new StringBuilder("obar");
modifier.modify(charBuf, 1, false);
assertEquals("obar", charBuf.toString()); // assert no match
// test: match "\bbar" in "foo bar" - with look-behind
modifier = createModifier("\\bbar", "boom", 1, 100, 0);
charBuf = new StringBuilder(" bar");
modifier.modify(charBuf, 1, false);
assertEquals(" boom", charBuf.toString()); // assert match
// test: match "\bbar" in "bar" - with look-behind
modifier = createModifier("\\bbar", "boom", 1, 100, 0);
charBuf = new StringBuilder("bar");
modifier.modify(charBuf, 0, false);
assertEquals("boom", charBuf.toString()); // assert match
}
@Test
public void testBoundaryMatchers3_b_AWordBoundary_AtTheEnd() throws Exception {
// test: match "foo\b" in "foobar"
RegexModifier modifier = createModifier("foo\\b", "hoo", 0, 100, 0);
StringBuilder charBuf = new StringBuilder("foo");
AfterModification modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertTrue(modification.isModifyAgainImmediately());
assertEquals(0, modification.getNumberOfCharactersToSkip());
assertEquals("foo", charBuf.toString()); // not changed
charBuf = new StringBuilder("foob");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("foob", charBuf.toString()); // not changed
// test: match "foo\b" in "foo bar"
charBuf = new StringBuilder("foo ");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
// four characters are skipped because the modifier continues matching
// so that finally all characters are skipped
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("hoo ", charBuf.toString()); // changed
}
@Test
public void testBoundaryMatchers3_B_ANonWordBoundary_AtTheBeginning_correctUsage_withLookBehind() throws Exception {
// test: match "\B,,," in "x,,," - with look-behind
RegexModifier modifier = createModifier("\\B,,,", "boom", 1, 100, 0);
StringBuilder charBuf = new StringBuilder("x,,,");
modifier.modify(charBuf, 1, false);
assertEquals("x,,,", charBuf.toString()); // assert no match
// test: match "\B,,," in "-,,," - with look-behind
modifier = createModifier("\\B,,,", "boom", 1, 100, 0);
charBuf = new StringBuilder("-,,,");
modifier.modify(charBuf, 1, false);
assertEquals("-boom", charBuf.toString()); // assert match
// test: match "\B,,," in ",,," - with look-behind
modifier = createModifier("\\B,,,", "boom", 1, 100, 0);
charBuf = new StringBuilder(",,,");
modifier.modify(charBuf, 0, false);
assertEquals("boom", charBuf.toString()); // assert match
}
@Test
public void testBoundaryMatchers4_B_ANonWordBoundary_AtTheEnd() throws Exception {
// test: match "foo\B" in ",,,x"
RegexModifier modifier = createModifier(",,,\\B", "hoo", 0, 100, 0);
StringBuilder charBuf = new StringBuilder(",,,");
AfterModification modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertTrue(modification.isModifyAgainImmediately());
assertEquals(0, modification.getNumberOfCharactersToSkip());
assertEquals(",,,", charBuf.toString()); // not changed
charBuf = new StringBuilder(",,,x");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals(",,,x", charBuf.toString()); // not changed
// test: match "foo\B" in ",,,-"
charBuf = new StringBuilder(",,,-");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
// four characters are skipped because the modifier continues matching
// so that finally all characters are skipped
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("hoo-", charBuf.toString()); // changed
}
@Test
public void testBoundaryMatchers5_A_TheBeginningOfTheInput_correctUsage_withLookBehind() throws Exception {
// test: match "\Abar" in "foobar" - with look-behind
RegexModifier modifier = createModifier("\\Abar", "boom", 1, 100, 0);
StringBuilder charBuf = new StringBuilder("obar");
modifier.modify(charBuf, 1, false);
assertEquals("obar", charBuf.toString()); // assert no match
// test: match "\Abar" in "bar" - with look-behind
modifier = createModifier("\\Abar", "boom", 1, 100, 0);
charBuf = new StringBuilder("bar");
modifier.modify(charBuf, 0, false);
assertEquals("boom", charBuf.toString()); // assert match
}
/**
* See <a href="http://www.regular-expressions.info/continue.html>Continuing at The End of The Previous Match</a>
*
* @throws Exception
*/
@Test
public void testBoundaryMatchers6_G_TheEndOfThePreviousMatch_MISSING_FEATURE() throws Exception {
// it's nice that this works here but this is because it matches at
// EVERY position here
assertReplacementByReader("yzyz", "\\G(y|z)", "x", 1, 1024, "xxxx", 0);
assertReplacementByReader("yzyzyzyzyzyz", "\\G(y|z)", "x", 1, 2, "xxxxxxxxxxxx", 0);
// there are other cases that are not supported:
try {
assertReplacementByReader("azyzazyz", "(y)|(\\Gz)", "x", 1, 2, "azxxazxx", 0);
fail("ComparisonFailure expected");
} catch (ComparisonFailure e) {
assertEquals("expected:<a[zxxaz]xx> but was:<a[xxxax]xx>", e.getMessage());
}
}
/**
* See <a href="http://www.regular-expressions.info/anchors.html">Strings Ending with a Line Break</a>
*
* @throws Exception
*/
@Test
public void testBoundaryMatchers7_Z_TheEndOfTheInput() throws Exception {
// test: match "foo\Z" in "foobar"
RegexModifier modifier = createModifier("foo\\Z", "hoo", 0, 100, 0);
StringBuilder charBuf = new StringBuilder("foo");
AfterModification modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertTrue(modification.isModifyAgainImmediately());
assertEquals(0, modification.getNumberOfCharactersToSkip());
assertEquals("foo", charBuf.toString()); // not changed
charBuf = new StringBuilder("foob");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("foob", charBuf.toString()); // not changed
// test: match "foo\Z" in "foo"
charBuf = new StringBuilder("foo");
modification = modifier.modify(charBuf, 0, true);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
// four characters are skipped because the modifier continues matching
// so that finally all characters are skipped
assertEquals(3, modification.getNumberOfCharactersToSkip());
assertEquals("hoo", charBuf.toString()); // changed
// test: match "foo\Z" in "foo\n"
charBuf = new StringBuilder("foo\n");
modification = modifier.modify(charBuf, 0, true);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
// four characters are skipped because the modifier continues matching
// so that finally all characters are skipped
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("hoo\n", charBuf.toString()); // changed
}
@Test
public void testBoundaryMatchers8_Z_TheEndOfTheInput() throws Exception {
// test: match "foo\z" in "foobar"
RegexModifier modifier = createModifier("foo\\z", "hoo", 0, 100, 0);
StringBuilder charBuf = new StringBuilder("foo");
AfterModification modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertTrue(modification.isModifyAgainImmediately());
assertEquals(0, modification.getNumberOfCharactersToSkip());
assertEquals("foo", charBuf.toString()); // not changed
charBuf = new StringBuilder("foob");
modification = modifier.modify(charBuf, 0, false);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("foob", charBuf.toString()); // not changed
// test: match "foo\z" in "foo"
charBuf = new StringBuilder("foo");
modification = modifier.modify(charBuf, 0, true);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
// four characters are skipped because the modifier continues matching
// so that finally all characters are skipped
assertEquals(3, modification.getNumberOfCharactersToSkip());
assertEquals("hoo", charBuf.toString()); // changed
// test: match "foo\z" in "foo\n"
charBuf = new StringBuilder("foo\n");
modification = modifier.modify(charBuf, 0, true);
// System.out.println(modification);
assertFalse(modification.isModifyAgainImmediately());
// four characters are skipped because the modifier continues matching
// so that finally all characters are skipped
assertEquals(4, modification.getNumberOfCharactersToSkip());
assertEquals("foo\n", charBuf.toString()); // not changed
}
}